######################
#  Replication code for 'Mediating the Electoral Connection', forthcoming in the JOP
#  John Henderson and John Brooks
#  12/7/2015    
######################    

# figure1.R
#  :: produces the synthetic congress voting behavior assuming a constant treatment effect from rain

rm(list=ls())        

setwd('~/Dropbox/rainReplication')

if(length(which(installed.packages()[,1]=='matrixStats'))!=1){
	install.packages('matrixStats')                        
}

library(matrixStats)     

# combine these ideal points and variance; over k     
load('simulations/simruns/irt_ideal-1a_mod.Rdata')
ideal_1=ideal
cutpoint_1=cutpoint  
load('simulations/simruns/irt_ideal-2a_mod.Rdata')
ideal_2=ideal
cutpoint_2=cutpoint  
load('simulations/simruns/irt_ideal-3a_mod.Rdata')
ideal_3=ideal        
cutpoint_3=cutpoint  
load('simulations/simruns/irt_ideal-4a_mod.Rdata')
ideal_4=ideal        
cutpoint_4=cutpoint  

                

# combine these ideal points and variance; over k
# here have the predictions given estimate sizes...
load('simulations/simruns/irt_pred-1a.Rdata')    # base=77;sets=c(78:(78+8))  
bsata1_1=bs1; bsata2_1=bs2; bsata3_1=bs3 
bmata1_1=bm1; bmata2_1=bm2; bmata3_1=bm3 
msata1_1=ms1; msata2_1=ms2; msata3_1=ms3 
mmata1_1=mm1; mmata2_1=mm2; mmata3_1=mm3
load('simulations/simruns/irt_pred-2a.Rdata')    # base=77;sets=c((78+9):(78+9+7))  
bsata1_2=bs1; bsata2_2=bs2; bsata3_2=bs3 
bmata1_2=bm1; bmata2_2=bm2; bmata3_2=bm3 
msata1_2=ms1; msata2_2=ms2; msata3_2=ms3 
mmata1_2=mm1; mmata2_2=mm2; mmata3_2=mm3
load('simulations/simruns/irt_pred-3a.Rdata')   # base=77;sets=c((78+9+8):(78+9+8+8))
bsata1_3=bs1; bsata2_3=bs2; bsata3_3=bs3 
bmata1_3=bm1; bmata2_3=bm2; bmata3_3=bm3 
msata1_3=ms1; msata2_3=ms2; msata3_3=ms3 
mmata1_3=mm1; mmata2_3=mm2; mmata3_3=mm3
load('simulations/simruns/irt_pred-4a.Rdata')   # base=77;sets=c((78+9+8+9):(78+9+8+9+7))
bsata1_4=bs1; bsata2_4=bs2; bsata3_4=bs3 
bmata1_4=bm1; bmata2_4=bm2; bmata3_4=bm3 
msata1_4=ms1; msata2_4=ms2; msata3_4=ms3 
mmata1_4=mm1; mmata2_4=mm2; mmata3_4=mm3

mm_mat1=ms_mat1=bm_mat1=bs_mat1=matrix(NA,1,3) 
mm_mat2=ms_mat2=bm_mat2=bs_mat2=matrix(NA,1,3) 
mm_mat3=ms_mat3=bm_mat3=bs_mat3=matrix(NA,1,3) 
cut_mat=ideal_mat=matrix(NA,1,4) 
 
colnames(cut_mat)=colnames(ideal_mat)=c("k","V1","V2","V3")

for(k in 78:111){  
	if(k>=78 & k<=86){
		ideal_mat=rbind(ideal_mat,cbind(k,ideal_1[[k]]))  
		cut_mat=rbind(cut_mat,cbind(k,cutpoint_1[[k]]))   
		       
		cols=ncol(bsata1_1[[k]]) 					

		bs_mat1=rbind(bs_mat1,cbind(k,colMeans(bsata1_1[[k]]/cols),colVars(bsata1_1[[k]]/cols))) 
		bm_mat1=rbind(bm_mat1,cbind(k,colMeans(bmata1_1[[k]]/cols),colVars(bmata1_1[[k]]/cols)))
		mm_mat1=rbind(mm_mat1,cbind(k,colMeans(mmata1_1[[k]]/cols),colVars(mmata1_1[[k]]/cols)))
		ms_mat1=rbind(ms_mat1,cbind(k,colMeans(msata1_1[[k]]/cols),colVars(msata1_1[[k]]/cols)))

		bs_mat2=rbind(bs_mat2,cbind(k,colMeans(bsata2_1[[k]]/cols),colVars(bsata2_1[[k]]/cols))) 
		bm_mat2=rbind(bm_mat2,cbind(k,colMeans(bmata2_1[[k]]/cols),colVars(bmata2_1[[k]]/cols)))
		mm_mat2=rbind(mm_mat2,cbind(k,colMeans(mmata2_1[[k]]/cols),colVars(mmata2_1[[k]]/cols)))
		ms_mat2=rbind(ms_mat2,cbind(k,colMeans(msata2_1[[k]]/cols),colVars(msata2_1[[k]]/cols)))

		bs_mat3=rbind(bs_mat3,cbind(k,colMeans(bsata3_1[[k]]/cols),colVars(bsata3_1[[k]]/cols))) 
		bm_mat3=rbind(bm_mat3,cbind(k,colMeans(bmata3_1[[k]]/cols),colVars(bmata3_1[[k]]/cols)))
		mm_mat3=rbind(mm_mat3,cbind(k,colMeans(mmata3_1[[k]]/cols),colVars(mmata3_1[[k]]/cols)))
		ms_mat3=rbind(ms_mat3,cbind(k,colMeans(msata3_1[[k]]/cols),colVars(msata3_1[[k]]/cols))) 		
		
	}
	if(k>=87 & k<=94){     
		ideal_mat=rbind(ideal_mat,cbind(k,ideal_2[[k]]))  
		cut_mat=rbind(cut_mat,cbind(k,cutpoint_2[[k]]))   
		   
		cols=ncol(bsata1_2[[k]]) 			
				
		bs_mat1=rbind(bs_mat1,cbind(k,colMeans(bsata1_2[[k]]/cols),colVars(bsata1_2[[k]]/cols))) 
		bm_mat1=rbind(bm_mat1,cbind(k,colMeans(bmata1_2[[k]]/cols),colVars(bmata1_2[[k]]/cols)))
		mm_mat1=rbind(mm_mat1,cbind(k,colMeans(mmata1_2[[k]]/cols),colVars(mmata1_2[[k]]/cols)))
		ms_mat1=rbind(ms_mat1,cbind(k,colMeans(msata1_2[[k]]/cols),colVars(msata1_2[[k]]/cols)))

		bs_mat2=rbind(bs_mat2,cbind(k,colMeans(bsata2_2[[k]]/cols),colVars(bsata2_2[[k]]/cols))) 
		bm_mat2=rbind(bm_mat2,cbind(k,colMeans(bmata2_2[[k]]/cols),colVars(bmata2_2[[k]]/cols)))
		mm_mat2=rbind(mm_mat2,cbind(k,colMeans(mmata2_2[[k]]/cols),colVars(mmata2_2[[k]]/cols)))
		ms_mat2=rbind(ms_mat2,cbind(k,colMeans(msata2_2[[k]]/cols),colVars(msata2_2[[k]]/cols)))

		bs_mat3=rbind(bs_mat3,cbind(k,colMeans(bsata3_2[[k]]/cols),colVars(bsata3_2[[k]]/cols))) 
		bm_mat3=rbind(bm_mat3,cbind(k,colMeans(bmata3_2[[k]]/cols),colVars(bmata3_2[[k]]/cols)))
		mm_mat3=rbind(mm_mat3,cbind(k,colMeans(mmata3_2[[k]]/cols),colVars(mmata3_2[[k]]/cols)))
		ms_mat3=rbind(ms_mat3,cbind(k,colMeans(msata3_2[[k]]/cols),colVars(msata3_2[[k]]/cols)))			
		
	}   
	if(k>=95 & k<=103){
		ideal_mat=rbind(ideal_mat,cbind(k,ideal_3[[k]]))  
		cut_mat=rbind(cut_mat,cbind(k,cutpoint_3[[k]])) 
		
		cols=ncol(bsata1_3[[k]]) 			                                                  
		
		bs_mat1=rbind(bs_mat1,cbind(k,colMeans(bsata1_3[[k]]/cols),colVars(bsata1_3[[k]]/cols))) 
		bm_mat1=rbind(bm_mat1,cbind(k,colMeans(bmata1_3[[k]]/cols),colVars(bmata1_3[[k]]/cols)))
		mm_mat1=rbind(mm_mat1,cbind(k,colMeans(mmata1_3[[k]]/cols),colVars(mmata1_3[[k]]/cols)))
		ms_mat1=rbind(ms_mat1,cbind(k,colMeans(msata1_3[[k]]/cols),colVars(msata1_3[[k]]/cols)))

		bs_mat2=rbind(bs_mat2,cbind(k,colMeans(bsata2_3[[k]]/cols),colVars(bsata2_3[[k]]/cols))) 
		bm_mat2=rbind(bm_mat2,cbind(k,colMeans(bmata2_3[[k]]/cols),colVars(bmata2_3[[k]]/cols)))
		mm_mat2=rbind(mm_mat2,cbind(k,colMeans(mmata2_3[[k]]/cols),colVars(mmata2_3[[k]]/cols)))
		ms_mat2=rbind(ms_mat2,cbind(k,colMeans(msata2_3[[k]]/cols),colVars(msata2_3[[k]]/cols)))

		bs_mat3=rbind(bs_mat3,cbind(k,colMeans(bsata3_3[[k]]/cols),colVars(bsata3_3[[k]]/cols))) 
		bm_mat3=rbind(bm_mat3,cbind(k,colMeans(bmata3_3[[k]]/cols),colVars(bmata3_3[[k]]/cols)))
		mm_mat3=rbind(mm_mat3,cbind(k,colMeans(mmata3_3[[k]]/cols),colVars(mmata3_3[[k]]/cols)))
		ms_mat3=rbind(ms_mat3,cbind(k,colMeans(msata3_3[[k]]/cols),colVars(msata3_3[[k]]/cols)))
		 						
	}
	if(k>=104 & k<=111){
		ideal_mat=rbind(ideal_mat,cbind(k,ideal_4[[k]]))  
		cut_mat=rbind(cut_mat,cbind(k,cutpoint_4[[k]]))  	

		cols=ncol(bsata1_4[[k]]) 			

		bs_mat1=rbind(bs_mat1,cbind(k,colMeans(bsata1_4[[k]]/cols),colVars(bsata1_4[[k]]/cols))) 
		bm_mat1=rbind(bm_mat1,cbind(k,colMeans(bmata1_4[[k]]/cols),colVars(bmata1_4[[k]]/cols)))
		mm_mat1=rbind(mm_mat1,cbind(k,colMeans(mmata1_4[[k]]/cols),colVars(mmata1_4[[k]]/cols)))
		ms_mat1=rbind(ms_mat1,cbind(k,colMeans(msata1_4[[k]]/cols),colVars(msata1_4[[k]]/cols)))

		bs_mat2=rbind(bs_mat2,cbind(k,colMeans(bsata2_4[[k]]/cols),colVars(bsata2_4[[k]]/cols))) 
		bm_mat2=rbind(bm_mat2,cbind(k,colMeans(bmata2_4[[k]]/cols),colVars(bmata2_4[[k]]/cols)))
		mm_mat2=rbind(mm_mat2,cbind(k,colMeans(mmata2_4[[k]]/cols),colVars(mmata2_4[[k]]/cols)))
		ms_mat2=rbind(ms_mat2,cbind(k,colMeans(msata2_4[[k]]/cols),colVars(msata2_4[[k]]/cols)))

		bs_mat3=rbind(bs_mat3,cbind(k,colMeans(bsata3_4[[k]]/cols),colVars(bsata3_4[[k]]/cols))) 
		bm_mat3=rbind(bm_mat3,cbind(k,colMeans(bmata3_4[[k]]/cols),colVars(bmata3_4[[k]]/cols)))
		mm_mat3=rbind(mm_mat3,cbind(k,colMeans(mmata3_4[[k]]/cols),colVars(mmata3_4[[k]]/cols)))
		ms_mat3=rbind(ms_mat3,cbind(k,colMeans(msata3_4[[k]]/cols),colVars(msata3_4[[k]]/cols)))  

	}
}	 
         

# chop off pre-84th congress.. [78-83...]
ideal_mat=ideal_mat[which(ideal_mat[,1]>83),]   
cut_mat=cut_mat[which(cut_mat[,1]>83),]

bs_mat1=bs_mat1[which(bs_mat1[,1]>83),]
bm_mat1=bm_mat1[which(bm_mat1[,1]>83),]
ms_mat1=ms_mat1[which(ms_mat1[,1]>83),]
mm_mat1=mm_mat1[which(mm_mat1[,1]>83),]                             

bs_mat2=bs_mat2[which(bs_mat2[,1]>83),]
bm_mat2=bm_mat2[which(bm_mat2[,1]>83),]
ms_mat2=ms_mat2[which(ms_mat2[,1]>83),]
mm_mat2=mm_mat2[which(mm_mat2[,1]>83),]
       
bs_mat3=bs_mat3[which(bs_mat3[,1]>83),]
bm_mat3=bm_mat3[which(bm_mat3[,1]>83),]
ms_mat3=ms_mat3[which(ms_mat3[,1]>83),]
mm_mat3=mm_mat3[which(mm_mat3[,1]>83),]   

# check that cutpoints and ideal points have same dimension as the repositionings data     
di=dim(ideal_mat)[1]
di == dim(mm_mat1)[1] & di == dim(mm_mat2)[1] & di == dim(mm_mat3)[1]
di == dim(ms_mat1)[1] & di == dim(ms_mat2)[1] & di == dim(ms_mat3)[1] 

ci=dim(cut_mat)[1] 
ci == dim(bm_mat1)[1] & ci == dim(bm_mat2)[1] & ci == dim(bm_mat3)[1] 
ci == dim(bs_mat1)[1] & ci == dim(bs_mat2)[1] & ci == dim(bs_mat3)[1]         
     
# barplots are boring
# avgs..


          
pdf(file="simulations/simruns/time_postions1.pdf")
# MC vote changes; box plots over time
y=mm_mat1[,2]*1000  
x=mm_mat1[,1]
qnt=tapply(y,x,quantile,na.rm=T,prob=.9)  
qv=array(NA,length(y))
for(i in 1:length(qnt)){
	qv[which(x==as.numeric(names(qnt)[i]))]=qnt[i]
}   
      
y[which(y>qv)]=qv[which(y>qv)]
boxplot(y ~ x, axes=F,range=50,col='lightgrey',xlab="Congress",ylab="Count of Switches",ylim=c(0,50))  
axis(1,at=c(1+4*0:7),label=c(84+4*0:7))
axis(2)        
dev.off()
             
pdf(file="simulations/simruns/time_postions2.pdf")      
y=mm_mat2[,2]*1000  
x=mm_mat2[,1] 
qnt=tapply(y,x,quantile,na.rm=T,prob=.9)  
qv=array(NA,length(y))
for(i in 1:length(qnt)){
	qv[which(x==as.numeric(names(qnt)[i]))]=qnt[i]
}   
     
y[which(y>qv)]=qv[which(y>qv)]    
boxplot(y ~ x, axes=F,range=50,col='lightgrey',xlab="Congress",ylab="Count of Switches",ylim=c(0,50))  
axis(1,at=c(1+4*0:7),label=c(84+4*0:7))
axis(2)       
dev.off()       
                

pdf(file="simulations/simruns/time_postions3.pdf")   
y=mm_mat3[,2]*1000
x=mm_mat3[,1]    
qnt=tapply(y,x,quantile,na.rm=T,prob=.9)  
qv=array(NA,length(y))
for(i in 1:length(qnt)){
	qv[which(x==as.numeric(names(qnt)[i]))]=qnt[i]
}   
    
y[which(y>qv)]=qv[which(y>qv)]    
boxplot(y ~ x, axes=F,range=50,col='lightgrey',xlab="Congress",ylab="Count of Switches",ylim=c(0,50))  
axis(1,at=c(1+4*0:7),label=c(84+4*0:7))
axis(2)
dev.off()                  


   

pdf(file="simulations/simruns/scale_postions1.pdf")  
set.seed(1005)
y=mm_mat1[,2]*1000
x=ideal_mat[,3]
ix=sample(1:length(y),replace=F,size=2000)
#y=y[ix]
#x=x[ix]
y=jitter(y,factor=100)
x=jitter(x,factor=100)
plot(y~x,axes=F,ylab='Count of Switches',xlab='Scaled Ideal Points',ylim=c(0,80))
axis(1)
axis(2,)
#lines(lowess(y~x,f=1/10),col='red',lty=2,lwd=2) 
g=lowess(y~x,f=1/10)$x[which(lowess(y~x,f=1/10)$y==max(lowess(y~x,f=1/10)$y))]
lines(col='red',lty=2,lwd=2,x=c(g,g),y=c(0,90))
dev.off()                   

pdf(file="simulations/simruns/scale_postions2.pdf")  
set.seed(1005)
y=mm_mat2[,2]*1000
x=ideal_mat[,3]
ix=sample(1:length(y),replace=F,size=2000)
#y=y[ix]
#x=x[ix]
y=jitter(y,factor=100)
x=jitter(x,factor=100)
plot(y~x,axes=F,ylab='Count of Switches',xlab='Scaled Ideal Points',ylim=c(0,80))
axis(1)
axis(2,)
#lines(lowess(y~x,f=1/10),col='red',lty=2,lwd=2) 
g=lowess(y~x,f=1/10)$x[which(lowess(y~x,f=1/10)$y==max(lowess(y~x,f=1/10)$y))]
lines(col='red',lty=2,lwd=2,x=c(g,g),y=c(0,90))
dev.off()           

pdf(file="simulations/simruns/scale_postions3.pdf")  
set.seed(1005)
y=mm_mat3[,2]*1000
x=ideal_mat[,3]
ix=sample(1:length(y),replace=F,size=2000)
#y=y[ix]
#x=x[ix]
y=jitter(y,factor=100)
x=jitter(x,factor=100)
plot(y~x,axes=F,ylab='Count of Switches',xlab='Scaled Ideal Points',ylim=c(0,80))
axis(1)
axis(2,)
#lines(lowess(y~x,f=1/10),col='red',lty=2,lwd=2) 
g=lowess(y~x,f=1/10)$x[which(lowess(y~x,f=1/10)$y==max(lowess(y~x,f=1/10)$y))]
lines(col='red',lty=2,lwd=2,x=c(g,g),y=c(0,90))
dev.off()       
                      
# switching moments  
# sampling from x=rnorm(mean=1.298,sd=.531,n=1000) 

# .75%
mean(mm_mat1[,2]*1000) 
# 3.821523 
            
# 1.5%
mean(mm_mat2[,2]*1000) 
# 7.655567 

# 2.5%
mean(mm_mat3[,2]*1000) 
# 12.78965 
 
# 2.5 % => 0.0325 on ideal-point shift  
#  - some human interpretations 

rm(list=ls())      
library(stringr)

nom_tab=read.table(stringsAsFactors=F,'nominate/nominate_per_congress.txt',sep='\t')
nom_cong=as.numeric(str_sub(nom_tab[,],1,5))
nom_icpsr=as.numeric(str_sub(nom_tab[,],6,10))
nom_dw1=as.numeric(str_sub(nom_tab[,],42,47))
                              
icp=nom_icpsr[which(nom_cong==110)]
vts=nom_dw1[which(nom_cong==110)]
 
icp=icp[which(!is.na(vts))] 
vts=vts[which(!is.na(vts))]

icp=icp[order(vts)] 
vts=vts[order(vts)]

shift_vts=shift_rank=shift_indx=matrix(NA,length(vts),2)
for(i in 1:nrow(shift_indx)){ 
	ix=which(vts>vts[i]+0.0325/3) 	
	if(length(ix)>0){
		ix=min(ix)		
		shift_indx[i,1]=icp[i]
		shift_indx[i,2]=icp[ix]         
	
		shift_rank[i,1]=i
		shift_rank[i,2]=ix
		
		shift_vts[i,1]=vts[i]
		shift_vts[i,2]=vts[ix]		
	}		
}
iy=which(shift_vts[,2]-shift_vts[,1]>.055/3)     
shift_vts[iy,]=c(NA,NA)
shift_rank[iy,]=c(NA,NA)
shift_indx[iy,]=c(NA,NA)   

# average rank shift 
kth=round(median(shift_rank[,2]-shift_rank[,1],na.rm=T))
# 11 rank positions 
kx=which(shift_rank[,2]-shift_rank[,1]==kth)

si=shift_indx[kx,]
#si=si[c(1,round(nrow(si)/2),nrow(si)),]
set.seed(1005)
si=si[sample(1:nrow(si),size=4),]
      
# 110 15438 47 4 NORTH C  100 PRICE       -0.362        
#  110 29776 13 6 NEW YOR  100 MEEKS       -0.383
     
# 110 29390 2415 OHIO     200 PRYCE        0.602
#  110 20115 2110 ILLINOI  200 KIRK         0.648
  
# 110 29720 22 7 INDIANA  100 CARSON      -0.446    
#  110 39307 40 3 VIRGINI  100 SCOTT       -0.425

# 110 29701 41 4 ALABAMA  200 ADERHOLT     0.464  
#  110 29522 31 4 IOWA     200 LATHAM       0.485
  
# PRICE (NC) to MEEKS (NY)
# PRYCE (OH) to KIRK (IL)

sen_tab=read.table(stringsAsFactors=F,'nominate/senate_nominate_110.txt',sep='\t')
sen_cong=as.numeric(str_sub(sen_tab[,],1,5))
sen_icpsr=as.numeric(str_sub(sen_tab[,],6,10))
sen_dw1=as.numeric(str_sub(sen_tab[,],42,52))
                                
sen_icpsr[which(sen_dw1==min(sen_dw1[which(sen_dw1 > sen_dw1[which(sen_icpsr==49300)] + 0.0325/3)]))]
# feinstein (CA) to conrad (ND)

sen_icpsr[which(sen_dw1==min(sen_dw1[which(sen_dw1 > sen_dw1[which(sen_icpsr==14921)] + 0.0325/3)]))]
# mcconnell (WV) to isakson (GA)
      
sen_icpsr[which(sen_dw1==min(sen_dw1[which(sen_dw1 > sen_dw1[which(sen_icpsr==49703)] + 0.0325/3)]))] 
# collins (ME) to specter (PA)q

# PRICE (NC) to MEEKS (NY) or FEINSTEIN to KLOBUCHAR 
# PRYCE (OH) to KIRK (IL) or MCCONNELL to ISAKSON 
 
# END       